P21 integration

Integration for GSE149524 P21 pre-analyzed obj rep1 and rep2

load processed obj

merged.list <- list(rep1=readRDS("./GSE149524.P21_rep1.initial.rds"),
                    rep2=readRDS("./GSE149524.P21_rep2.initial.rds"))
merged.list
## $rep1
## An object of class Seurat 
## 17943 features across 3160 samples within 1 assay 
## Active assay: RNA (17943 features, 1500 variable features)
##  3 dimensional reductions calculated: pca, tsne, umap
## 
## $rep2
## An object of class Seurat 
## 17800 features across 2406 samples within 1 assay 
## Active assay: RNA (17800 features, 1500 variable features)
##  3 dimensional reductions calculated: pca, tsne, umap
color.pre <- c("#8AB6CE","#678BB1","#3975C1","#4CC1BD",
               "#C03778","#97BA59","#DFAB16","#BF7E6B",
               "#D46B35","#BDAE8D","#BD66C4","#2BA956",
               "#FF8080","#FF8080","#FF8080","#FF0000")

filtering

rep1 remove Glia clusters and C15
rep2 remove Glia clusters and C18

no more filtering for other cutoffs (like DoubletFinder0.05/0.1)

levels(merged.list$rep1$seurat_clusters)
##  [1] "0"  "1"  "2"  "3"  "4"  "5"  "6"  "7"  "8"  "9"  "10" "11" "12" "13" "14"
## [16] "15" "16" "17" "18" "19" "20" "21" "22"
levels(merged.list$rep2$seurat_clusters)
##  [1] "0"  "1"  "2"  "3"  "4"  "5"  "6"  "7"  "8"  "9"  "10" "11" "12" "13" "14"
## [16] "15" "16" "17" "18" "19" "20" "21"
merged.list$rep1 <- subset(merged.list$rep1, 
                           subset = seurat_clusters %in% setdiff(levels(merged.list$rep1$seurat_clusters),
                                                                 c(16,4,11,20,15)))

merged.list$rep2 <- subset(merged.list$rep2, 
                           subset = seurat_clusters %in% setdiff(levels(merged.list$rep2$seurat_clusters),
                                                                 c(16,3,14,20,18)))
merged.list
## $rep1
## An object of class Seurat 
## 17943 features across 2649 samples within 1 assay 
## Active assay: RNA (17943 features, 1500 variable features)
##  3 dimensional reductions calculated: pca, tsne, umap
## 
## $rep2
## An object of class Seurat 
## 17800 features across 2060 samples within 1 assay 
## Active assay: RNA (17800 features, 1500 variable features)
##  3 dimensional reductions calculated: pca, tsne, umap

integration

merged.list <- lapply(merged.list, function(x){
  x@meta.data[,grep("snn|clusters|pANN|Doublet",colnames(x@meta.data))] <- NULL
  x
})


lapply(merged.list, function(x){head(x@meta.data)})
## $rep1
##                    orig.ident nCount_RNA nFeature_RNA percent.mt percent.rb
## AAACCCACAGTCGCAC-1   P21.rep1      26419         5433   4.398350  10.935312
## AAACCCAGTAGGTACG-1   P21.rep1      14427         3814   6.737367  11.908228
## AAACCCATCGCCTCTA-1   P21.rep1      24650         5043   4.933063  12.085193
## AAACGAACATAAGCGG-1   P21.rep1      26933         5341   8.186983  10.841718
## AAACGAAGTCGTCAGC-1   P21.rep1      17679         4748   9.400984   6.736806
## AAACGAATCCTCTAAT-1   P21.rep1      33402         5875   5.182324   8.607269
##                         S.Score   G2M.Score Phase preAnno
## AAACCCACAGTCGCAC-1 -0.071428571  0.23805318   G2M    ENC1
## AAACCCAGTAGGTACG-1 -0.082539683 -0.06026591    G1    ENC7
## AAACCCATCGCCTCTA-1  0.069841270  0.19479582   G2M    ENC2
## AAACGAACATAAGCGG-1 -0.161261261 -0.50418803    G1    ENC2
## AAACGAAGTCGTCAGC-1  0.100686401  0.02331434     S    ENC2
## AAACGAATCCTCTAAT-1 -0.008837409  0.31416904   G2M    ENC2
## 
## $rep2
##                    orig.ident nCount_RNA nFeature_RNA percent.mt percent.rb
## AAACCCAAGTATGAAC-1   P21.rep2      34927         6073   5.849343   8.039626
## AAACCCAAGTCGGGAT-1   P21.rep2      21095         4854   5.370941  11.154302
## AAACCCACATGACTTG-1   P21.rep2      25761         5672   5.054152  10.465432
## AAACGCTAGTGGTCAG-1   P21.rep2      14332         4041   5.484231  14.478091
## AAACGCTGTTGCATGT-1   P21.rep2      33145         6340   9.150701   7.919747
## AAACGCTTCATTGCCC-1   P21.rep2      30833         6123   5.753576   9.447670
##                        S.Score  G2M.Score Phase preAnno
## AAACCCAAGTATGAAC-1  0.09262390  0.4971569   G2M    ENC9
## AAACCCAAGTCGGGAT-1  0.04209209  0.1058824   G2M    ENC2
## AAACCCACATGACTTG-1 -0.06480212  0.2204608   G2M    ENC2
## AAACGCTAGTGGTCAG-1  0.06405666 -0.1081667     S    ENC1
## AAACGCTGTTGCATGT-1  0.06894211  0.1757157   G2M    ENC8
## AAACGCTTCATTGCCC-1  0.07659642  0.0810000   G2M    ENC1

SCT

SCT.list <- merged.list

SCT.list <- lapply(SCT.list, function(x){
  Idents(x) <- "orig.ident"
  x <- SCTransform(x, variable.features.n = 3000, vst.flavor = "v2",
                   return.only.var.genes = FALSE)
  x
})
## vst.flavor='v2' set, setting model to use fixed slope and exclude poisson genes.
## Calculating cell attributes from input UMI matrix: log_umi
## Total Step 1 genes: 16063
## Total overdispersed genes: 13841
## Excluding 2222 genes from Step 1 because they are not overdispersed.
## Variance stabilizing transformation of count matrix of size 16063 by 2649
## Model formula is y ~ log_umi
## Get Negative Binomial regression parameters per gene
## Using 2000 genes, 2649 cells
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |======================================================================| 100%
## Setting estimate of  69 genes to inf as theta_mm/theta_mle < 1e-3
## # of step1 poisson genes (variance < mean): 0
## # of low mean genes (mean < 0.001): 0
## Total # of Step1 poisson genes (theta=Inf; variance < mean): 69
## Total # of poisson genes (theta=Inf; variance < mean): 2222
## Calling offset model for all 2222 poisson genes
## Found 98 outliers - those will be ignored in fitting/regularization step
## Ignoring theta inf genes
## Replacing fit params for 2222 poisson genes by theta=Inf
## Setting min_variance based on median UMI:  0.16
## Second step: Get residuals using fitted parameters for 16063 genes
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |======                                                                |   9%
  |                                                                            
  |========                                                              |  12%
  |                                                                            
  |===========                                                           |  15%
  |                                                                            
  |=============                                                         |  18%
  |                                                                            
  |===============                                                       |  21%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |===================                                                   |  27%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |============================                                          |  39%
  |                                                                            
  |==============================                                        |  42%
  |                                                                            
  |================================                                      |  45%
  |                                                                            
  |==================================                                    |  48%
  |                                                                            
  |====================================                                  |  52%
  |                                                                            
  |======================================                                |  55%
  |                                                                            
  |========================================                              |  58%
  |                                                                            
  |==========================================                            |  61%
  |                                                                            
  |=============================================                         |  64%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |===================================================                   |  73%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |=======================================================               |  79%
  |                                                                            
  |=========================================================             |  82%
  |                                                                            
  |===========================================================           |  85%
  |                                                                            
  |==============================================================        |  88%
  |                                                                            
  |================================================================      |  91%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |======================================================================| 100%
## Computing corrected count matrix for 16063 genes
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |======                                                                |   9%
  |                                                                            
  |========                                                              |  12%
  |                                                                            
  |===========                                                           |  15%
  |                                                                            
  |=============                                                         |  18%
  |                                                                            
  |===============                                                       |  21%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |===================                                                   |  27%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |============================                                          |  39%
  |                                                                            
  |==============================                                        |  42%
  |                                                                            
  |================================                                      |  45%
  |                                                                            
  |==================================                                    |  48%
  |                                                                            
  |====================================                                  |  52%
  |                                                                            
  |======================================                                |  55%
  |                                                                            
  |========================================                              |  58%
  |                                                                            
  |==========================================                            |  61%
  |                                                                            
  |=============================================                         |  64%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |===================================================                   |  73%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |=======================================================               |  79%
  |                                                                            
  |=========================================================             |  82%
  |                                                                            
  |===========================================================           |  85%
  |                                                                            
  |==============================================================        |  88%
  |                                                                            
  |================================================================      |  91%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |======================================================================| 100%
## Calculating gene attributes
## Wall clock passed: Time difference of 29.14117 secs
## Determine variable features
## Place corrected count matrix in counts slot
## Centering data matrix
## Set default assay to SCT
## vst.flavor='v2' set, setting model to use fixed slope and exclude poisson genes.
## Calculating cell attributes from input UMI matrix: log_umi
## Total Step 1 genes: 15902
## Total overdispersed genes: 13735
## Excluding 2167 genes from Step 1 because they are not overdispersed.
## Variance stabilizing transformation of count matrix of size 15902 by 2060
## Model formula is y ~ log_umi
## Get Negative Binomial regression parameters per gene
## Using 2000 genes, 2060 cells
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |======================================================================| 100%
## Setting estimate of  99 genes to inf as theta_mm/theta_mle < 1e-3
## # of step1 poisson genes (variance < mean): 0
## # of low mean genes (mean < 0.001): 0
## Total # of Step1 poisson genes (theta=Inf; variance < mean): 99
## Total # of poisson genes (theta=Inf; variance < mean): 2167
## Calling offset model for all 2167 poisson genes
## Found 132 outliers - those will be ignored in fitting/regularization step
## Ignoring theta inf genes
## Replacing fit params for 2167 poisson genes by theta=Inf
## Setting min_variance based on median UMI:  0.16
## Second step: Get residuals using fitted parameters for 15902 genes
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |=======                                                               |   9%
  |                                                                            
  |=========                                                             |  12%
  |                                                                            
  |===========                                                           |  16%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |===============                                                       |  22%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |====================                                                  |  28%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |==========================                                            |  38%
  |                                                                            
  |============================                                          |  41%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |==========================================                            |  59%
  |                                                                            
  |============================================                          |  62%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |==================================================                    |  72%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |=======================================================               |  78%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |===========================================================           |  84%
  |                                                                            
  |=============================================================         |  88%
  |                                                                            
  |===============================================================       |  91%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |======================================================================| 100%
## Computing corrected count matrix for 15902 genes
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |=======                                                               |   9%
  |                                                                            
  |=========                                                             |  12%
  |                                                                            
  |===========                                                           |  16%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |===============                                                       |  22%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |====================                                                  |  28%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |==========================                                            |  38%
  |                                                                            
  |============================                                          |  41%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |==========================================                            |  59%
  |                                                                            
  |============================================                          |  62%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |==================================================                    |  72%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |=======================================================               |  78%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |===========================================================           |  84%
  |                                                                            
  |=============================================================         |  88%
  |                                                                            
  |===============================================================       |  91%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |======================================================================| 100%
## Calculating gene attributes
## Wall clock passed: Time difference of 23.90447 secs
## Determine variable features
## Place corrected count matrix in counts slot
## Centering data matrix
## Set default assay to SCT
SCT.list
## $rep1
## An object of class Seurat 
## 34006 features across 2649 samples within 2 assays 
## Active assay: SCT (16063 features, 3000 variable features)
##  1 other assay present: RNA
##  3 dimensional reductions calculated: pca, tsne, umap
## 
## $rep2
## An object of class Seurat 
## 33702 features across 2060 samples within 2 assays 
## Active assay: SCT (15902 features, 3000 variable features)
##  1 other assay present: RNA
##  3 dimensional reductions calculated: pca, tsne, umap

features

feature.raw <- SelectIntegrationFeatures(object.list = SCT.list, nfeatures = 3000)
head(feature.raw,300)
##   [1] "Gal"           "Tac1"          "Cartpt"        "Npy"          
##   [5] "Vip"           "Calb2"         "Ly6c1"         "Nefl"         
##   [9] "Penk"          "Bglap"         "Cck"           "Scgn"         
##  [13] "Nnat"          "Pcp4l1"        "Ndufa4l2"      "Ntng1"        
##  [17] "Calcb"         "Serpine2"      "Nos1"          "C1ql1"        
##  [21] "Mt3"           "Rprml"         "Grp"           "Mgat4c"       
##  [25] "Ucn3"          "Myl1"          "Pcp4"          "Ass1"         
##  [29] "Cox8b"         "Bglap2"        "Crabp1"        "Nefm"         
##  [33] "Sst"           "Dbh"           "Sdpr"          "Csrp2"        
##  [37] "Etv1"          "Paip2b"        "Cd24a"         "Krt19"        
##  [41] "Vim"           "Serpini1"      "Xist"          "Adgrg6"       
##  [45] "Hspa1a"        "Nmu"           "Tcap"          "Slc17a6"      
##  [49] "Rbp4"          "Sncg"          "Calb1"         "Ebf1"         
##  [53] "Tmeff2"        "S100a4"        "Cpne4"         "Ifitm3"       
##  [57] "Fst"           "Fos"           "Neurod6"       "Basp1"        
##  [61] "Gm42418"       "Camp"          "Cdkn1c"        "Nxph2"        
##  [65] "Apoe"          "Chgb"          "Lgals3"        "Gng11"        
##  [69] "Scg2"          "Pcdh10"        "Adcyap1"       "Epha5"        
##  [73] "Ly6e"          "Cbln2"         "Alcam"         "Cst12"        
##  [77] "A330069E16Rik" "Phlda1"        "Tm4sf4"        "Phgdh"        
##  [81] "Abca9"         "Zfp804a"       "Crip1"         "Skap1"        
##  [85] "Htr3a"         "Id3"           "Sparc"         "Hoxb7"        
##  [89] "AI593442"      "Brinp3"        "Hspb1"         "Cntn5"        
##  [93] "Ntrk3"         "Tspan8"        "Oprk1"         "Avil"         
##  [97] "Tesc"          "Gm13889"       "3110079O15Rik" "Resp18"       
## [101] "Gm13305"       "Jun"           "S100a13"       "S100a6"       
## [105] "S100a11"       "Snca"          "Vstm2a"        "Cckar"        
## [109] "Oas1d"         "Igfbp7"        "Lgals1"        "Cryab"        
## [113] "Gng2"          "Dmkn"          "Htr2b"         "Spock3"       
## [117] "Agrp"          "Ddah1"         "Nog"           "Itm2a"        
## [121] "S100a1"        "Cidea"         "Gad2"          "Id1"          
## [125] "Tshz2"         "Tmc3"          "Prune2"        "Ifi27"        
## [129] "Sag"           "Meg3"          "Thy1"          "Gng8"         
## [133] "Moxd1"         "Nrsn2"         "Rab3b"         "Ctgf"         
## [137] "Pbx3"          "Ifitm2"        "Pkib"          "Galr1"        
## [141] "Rgcc"          "S100a16"       "Kcnk2"         "Timp4"        
## [145] "Fxyd5"         "Id4"           "Nrip3"         "Sparcl1"      
## [149] "Adamts5"       "Cygb"          "Gda"           "Ano2"         
## [153] "Tbx3"          "Id2"           "Satb1"         "Stmn1"        
## [157] "Ptn"           "Klhl1"         "Egr1"          "Gna14"        
## [161] "Fam89a"        "Pcsk1n"        "S100b"         "Tmsb10"       
## [165] "Gpr149"        "Islr2"         "Il11ra1"       "Dapk2"        
## [169] "Snhg11"        "Tes"           "Kcnip4"        "Tcf7l2"       
## [173] "Kcnb2"         "Ngfr"          "Cd79a"         "Sema5a"       
## [177] "Mgll"          "Pdlim2"        "Fam122b"       "Ncald"        
## [181] "Chchd10"       "Anxa2"         "Stxbp6"        "Nrp2"         
## [185] "Pdyn"          "Robo2"         "RP23-407N2.2"  "Efr3a"        
## [189] "Hspa1b"        "Slc18a3"       "Fbln5"         "Dgkg"         
## [193] "Pnoc"          "Cdh9"          "Ifi27l2a"      "Junb"         
## [197] "Nfix"          "Rprm"          "Rab27b"        "Kitl"         
## [201] "Tmem176a"      "AY036118"      "Gm26772"       "Gfra1"        
## [205] "Rerg"          "Prph"          "Peg10"         "Mt1"          
## [209] "Rph3a"         "Abhd11os"      "Grin3a"        "Ngb"          
## [213] "Cnr1"          "Ecel1"         "BC048546"      "A330102I10Rik"
## [217] "Ank2"          "Pcdh9"         "Kctd12"        "Fxyd7"        
## [221] "Prkcdbp"       "Emp3"          "Abi3bp"        "Gch1"         
## [225] "Nt5dc2"        "Tmem35"        "Tnnt2"         "Tubb3"        
## [229] "6330403A02Rik" "Auts2"         "Spink8"        "Brinp2"       
## [233] "P2rx2"         "Tm4sf1"        "Pcsk1"         "Sox4"         
## [237] "Ucp2"          "Gpr37l1"       "9530059O14Rik" "Psd3"         
## [241] "Mrap2"         "Bcl2"          "Tlx2"          "Cpne8"        
## [245] "Cxcl12"        "Cadps2"        "Exosc2"        "Ndst4"        
## [249] "Ptprz1"        "Nupr1"         "Bche"          "Aldh1a3"      
## [253] "Hoxb8"         "Plxna4"        "Map1b"         "Hoxb6"        
## [257] "Chodl"         "Gmpr"          "Hoxa5"         "Snx7"         
## [261] "Gadd45g"       "Vcan"          "6330403K07Rik" "Bdnf"         
## [265] "Gm2694"        "Vsnl1"         "Fxyd1"         "Nxn"          
## [269] "Synpr"         "Pkp3"          "Tmem114"       "Dsc3"         
## [273] "Adm"           "Ifi203"        "Cd9"           "Igfbp4"       
## [277] "Gpsm3"         "Sec11c"        "Slc35d3"       "Adgre1"       
## [281] "Skap2"         "Gadd45b"       "Syt4"          "C1ql2"        
## [285] "Gap43"         "Necab2"        "Enc1"          "Tpd52l1"      
## [289] "Qdpr"          "Arhgap15"      "Timp3"         "Akap7"        
## [293] "Pam"           "Fam19a1"       "Fam19a5"       "Gucy1a3"      
## [297] "Crtac1"        "Sncb"          "Fam46a"        "Malat1"
DIG <- grep("^Tra|^Trb|^Trg|^Trd|^Tcr|^Igm|^Igh|^Igk|^Igl|Jchain|Mzb1|Vpreb|Lars2|Jun|Fos|Egr1|RP23-|^Hsp|^Rps|^Rpl|Hbb-|Hba-|^Dnaj|^AY|^AC|^AI|^BC|^Gm|^Hist|Rik$|-ps",
            feature.raw,value = T)

# sex-relaged genes
SRG <- c("Xist","Tsix","Uty","Ddx3y","Eif2s3y","Kdm5d")

CC_gene <- Hmisc::capitalize(tolower(as.vector(unlist(cc.genes.updated.2019))))
DIG
##   [1] "Hspa1a"        "Fos"           "Gm42418"       "A330069E16Rik"
##   [5] "AI593442"      "Hspb1"         "Gm13889"       "3110079O15Rik"
##   [9] "Gm13305"       "Jun"           "Egr1"          "RP23-407N2.2" 
##  [13] "Hspa1b"        "Junb"          "AY036118"      "Gm26772"      
##  [17] "BC048546"      "A330102I10Rik" "6330403A02Rik" "9530059O14Rik"
##  [21] "Gmpr"          "6330403K07Rik" "Gm2694"        "A730017C20Rik"
##  [25] "B930036N10Rik" "Rps24"         "Hist1h2bc"     "2900060B14Rik"
##  [29] "4732456N10Rik" "C730034F03Rik" "A830018L16Rik" "D030055H07Rik"
##  [33] "1500009L16Rik" "Fosb"          "A730046J19Rik" "Gm5424"       
##  [37] "B230312C02Rik" "Hist1h1c"      "Rps29"         "Hspb2"        
##  [41] "Hspb7"         "Rps8"          "E530001K10Rik" "Hist2h4"      
##  [45] "Hist4h4"       "Gm8773"        "Gm26825"       "1810041L15Rik"
##  [49] "Gm26917"       "Rps3a1"        "Hist1h1e"      "Rplp1"        
##  [53] "Gm12216"       "Hspa2"         "Dnajc15"       "Lars2"        
##  [57] "Dnaja1"        "Rps27a"        "Rpl26"         "Hspa5"        
##  [61] "Gm14964"       "1190002N15Rik" "4930550C14Rik" "Rpl23"        
##  [65] "Gm26532"       "Rpl3"          "5330429C05Rik" "Gm38112"      
##  [69] "5730508B09Rik" "Rplp0"         "Gm2990"        "Hsp90b1"      
##  [73] "Gm17750"       "5830473C10Rik" "BC029214"      "Gm1673"       
##  [77] "D930028M14Rik" "Rps4x"         "Hist3h2a"      "Rpsa"         
##  [81] "Gm36266"       "B630019K06Rik" "Rpl21"         "Hist3h2ba"    
##  [85] "Rpl37"         "5033428I22Rik" "Rps18"         "Gm9885"       
##  [89] "Hspb8"         "Gm11837"       "1810058I24Rik" "Rpl13"        
##  [93] "Rps21"         "Rpl32"         "Fosl2"         "Hsp90aa1"     
##  [97] "Rps10"         "Rpl12"         "Rps12"         "RP23-4H17.3"  
## [101] "Hspa4"         "Rpl9"          "5330434G04Rik" "Jund"         
## [105] "Hspa8"         "Gm15800"       "BC030500"      "1700023F06Rik"
## [109] "C530008M17Rik" "Rpl36a"        "2510009E07Rik" "Hist1h4d"     
## [113] "Rps9"          "2810428I15Rik" "Gm20342"       "1700048O20Rik"
## [117] "Rps14"         "BC089491"      "2900011O08Rik" "Rpl39"        
## [121] "AI413582"      "2610001J05Rik" "Rpl27a"        "Rpl8"         
## [125] "Rps5"          "Rpl41"         "Rpl19"         "Rps23"        
## [129] "4933434E20Rik" "Rps3"          "Gm26735"       "Rpl6"         
## [133] "RP23-291B1.2"  "Gm15417"       "Rps20"         "D430019H16Rik"
## [137] "Gm5900"        "4932438A13Rik" "A230050P20Rik" "1700025G04Rik"
## [141] "Rps15a"        "D430042O09Rik" "6430573F11Rik" "Gm4876"       
## [145] "Rpl30"         "2310033P09Rik" "Gm5914"        "Rps19"        
## [149] "Rpl35a"        "BC005561"      "AI314180"      "Rpl17"        
## [153] "Rpl38"         "9330182L06Rik" "9330158H04Rik" "4932422M17Rik"
## [157] "Hsph1"         "Rps7"          "6030419C18Rik" "2010111I01Rik"
## [161] "2610037D02Rik" "Rpl37a"        "A830010M20Rik" "Gm21092"      
## [165] "Gm26710"       "1600020E01Rik" "4921524J17Rik" "Traf3"        
## [169] "Gm10827"       "1500015A07Rik" "Gm26609"       "3110021N24Rik"
## [173] "Gm5148"        "Gm5784"        "D930030I03Rik" "Hsp90ab1"     
## [177] "E130102H24Rik" "Gm37494"       "Gm15050"       "Hist2h2ac"    
## [181] "Rpl10"         "Gm26911"       "6430548M08Rik" "Gm10605"      
## [185] "Gm15879"       "Rpl28"         "BC034090"      "Rps6"         
## [189] "Tra2b"         "1500009C09Rik" "2700081O15Rik" "Gm2464"       
## [193] "2810474O19Rik" "2810004N23Rik" "5930430L01Rik" "A830039N20Rik"
## [197] "Dnajc6"        "Rpl18"         "4930402H24Rik"
CC_gene
##  [1] "Mcm5"     "Pcna"     "Tyms"     "Fen1"     "Mcm7"     "Mcm4"    
##  [7] "Rrm1"     "Ung"      "Gins2"    "Mcm6"     "Cdca7"    "Dtl"     
## [13] "Prim1"    "Uhrf1"    "Cenpu"    "Hells"    "Rfc2"     "Polr1b"  
## [19] "Nasp"     "Rad51ap1" "Gmnn"     "Wdr76"    "Slbp"     "Ccne2"   
## [25] "Ubr7"     "Pold3"    "Msh2"     "Atad2"    "Rad51"    "Rrm2"    
## [31] "Cdc45"    "Cdc6"     "Exo1"     "Tipin"    "Dscc1"    "Blm"     
## [37] "Casp8ap2" "Usp1"     "Clspn"    "Pola1"    "Chaf1b"   "Mrpl36"  
## [43] "E2f8"     "Hmgb2"    "Cdk1"     "Nusap1"   "Ube2c"    "Birc5"   
## [49] "Tpx2"     "Top2a"    "Ndc80"    "Cks2"     "Nuf2"     "Cks1b"   
## [55] "Mki67"    "Tmpo"     "Cenpf"    "Tacc3"    "Pimreg"   "Smc4"    
## [61] "Ccnb2"    "Ckap2l"   "Ckap2"    "Aurkb"    "Bub1"     "Kif11"   
## [67] "Anp32e"   "Tubb4b"   "Gtse1"    "Kif20b"   "Hjurp"    "Cdca3"   
## [73] "Jpt1"     "Cdc20"    "Ttk"      "Cdc25c"   "Kif2c"    "Rangap1" 
## [79] "Ncapd2"   "Dlgap5"   "Cdca2"    "Cdca8"    "Ect2"     "Kif23"   
## [85] "Hmmr"     "Aurka"    "Psrc1"    "Anln"     "Lbr"      "Ckap5"   
## [91] "Cenpe"    "Ctcf"     "Nek2"     "G2e3"     "Gas2l3"   "Cbx5"    
## [97] "Cenpa"
features.filt <- setdiff(feature.raw, c(DIG,CC_gene,SRG))
length(features.filt)
## [1] 2788
head(features.filt,300)
##   [1] "Gal"      "Tac1"     "Cartpt"   "Npy"      "Vip"      "Calb2"   
##   [7] "Ly6c1"    "Nefl"     "Penk"     "Bglap"    "Cck"      "Scgn"    
##  [13] "Nnat"     "Pcp4l1"   "Ndufa4l2" "Ntng1"    "Calcb"    "Serpine2"
##  [19] "Nos1"     "C1ql1"    "Mt3"      "Rprml"    "Grp"      "Mgat4c"  
##  [25] "Ucn3"     "Myl1"     "Pcp4"     "Ass1"     "Cox8b"    "Bglap2"  
##  [31] "Crabp1"   "Nefm"     "Sst"      "Dbh"      "Sdpr"     "Csrp2"   
##  [37] "Etv1"     "Paip2b"   "Cd24a"    "Krt19"    "Vim"      "Serpini1"
##  [43] "Adgrg6"   "Nmu"      "Tcap"     "Slc17a6"  "Rbp4"     "Sncg"    
##  [49] "Calb1"    "Ebf1"     "Tmeff2"   "S100a4"   "Cpne4"    "Ifitm3"  
##  [55] "Fst"      "Neurod6"  "Basp1"    "Camp"     "Cdkn1c"   "Nxph2"   
##  [61] "Apoe"     "Chgb"     "Lgals3"   "Gng11"    "Scg2"     "Pcdh10"  
##  [67] "Adcyap1"  "Epha5"    "Ly6e"     "Cbln2"    "Alcam"    "Cst12"   
##  [73] "Phlda1"   "Tm4sf4"   "Phgdh"    "Abca9"    "Zfp804a"  "Crip1"   
##  [79] "Skap1"    "Htr3a"    "Id3"      "Sparc"    "Hoxb7"    "Brinp3"  
##  [85] "Cntn5"    "Ntrk3"    "Tspan8"   "Oprk1"    "Avil"     "Tesc"    
##  [91] "Resp18"   "S100a13"  "S100a6"   "S100a11"  "Snca"     "Vstm2a"  
##  [97] "Cckar"    "Oas1d"    "Igfbp7"   "Lgals1"   "Cryab"    "Gng2"    
## [103] "Dmkn"     "Htr2b"    "Spock3"   "Agrp"     "Ddah1"    "Nog"     
## [109] "Itm2a"    "S100a1"   "Cidea"    "Gad2"     "Id1"      "Tshz2"   
## [115] "Tmc3"     "Prune2"   "Ifi27"    "Sag"      "Meg3"     "Thy1"    
## [121] "Gng8"     "Moxd1"    "Nrsn2"    "Rab3b"    "Ctgf"     "Pbx3"    
## [127] "Ifitm2"   "Pkib"     "Galr1"    "Rgcc"     "S100a16"  "Kcnk2"   
## [133] "Timp4"    "Fxyd5"    "Id4"      "Nrip3"    "Sparcl1"  "Adamts5" 
## [139] "Cygb"     "Gda"      "Ano2"     "Tbx3"     "Id2"      "Satb1"   
## [145] "Stmn1"    "Ptn"      "Klhl1"    "Gna14"    "Fam89a"   "Pcsk1n"  
## [151] "S100b"    "Tmsb10"   "Gpr149"   "Islr2"    "Il11ra1"  "Dapk2"   
## [157] "Snhg11"   "Tes"      "Kcnip4"   "Tcf7l2"   "Kcnb2"    "Ngfr"    
## [163] "Cd79a"    "Sema5a"   "Mgll"     "Pdlim2"   "Fam122b"  "Ncald"   
## [169] "Chchd10"  "Anxa2"    "Stxbp6"   "Nrp2"     "Pdyn"     "Robo2"   
## [175] "Efr3a"    "Slc18a3"  "Fbln5"    "Dgkg"     "Pnoc"     "Cdh9"    
## [181] "Ifi27l2a" "Nfix"     "Rprm"     "Rab27b"   "Kitl"     "Tmem176a"
## [187] "Gfra1"    "Rerg"     "Prph"     "Peg10"    "Mt1"      "Rph3a"   
## [193] "Abhd11os" "Grin3a"   "Ngb"      "Cnr1"     "Ecel1"    "Ank2"    
## [199] "Pcdh9"    "Kctd12"   "Fxyd7"    "Prkcdbp"  "Emp3"     "Abi3bp"  
## [205] "Gch1"     "Nt5dc2"   "Tmem35"   "Tnnt2"    "Tubb3"    "Auts2"   
## [211] "Spink8"   "Brinp2"   "P2rx2"    "Tm4sf1"   "Pcsk1"    "Sox4"    
## [217] "Ucp2"     "Gpr37l1"  "Psd3"     "Mrap2"    "Bcl2"     "Tlx2"    
## [223] "Cpne8"    "Cxcl12"   "Cadps2"   "Exosc2"   "Ndst4"    "Ptprz1"  
## [229] "Nupr1"    "Bche"     "Aldh1a3"  "Hoxb8"    "Plxna4"   "Map1b"   
## [235] "Hoxb6"    "Chodl"    "Hoxa5"    "Snx7"     "Gadd45g"  "Vcan"    
## [241] "Bdnf"     "Vsnl1"    "Fxyd1"    "Nxn"      "Synpr"    "Pkp3"    
## [247] "Tmem114"  "Dsc3"     "Adm"      "Ifi203"   "Cd9"      "Igfbp4"  
## [253] "Gpsm3"    "Sec11c"   "Slc35d3"  "Adgre1"   "Skap2"    "Gadd45b" 
## [259] "Syt4"     "C1ql2"    "Gap43"    "Necab2"   "Enc1"     "Tpd52l1" 
## [265] "Qdpr"     "Arhgap15" "Timp3"    "Akap7"    "Pam"      "Fam19a1" 
## [271] "Fam19a5"  "Gucy1a3"  "Crtac1"   "Sncb"     "Fam46a"   "Malat1"  
## [277] "Parm1"    "Ier2"     "Nrp1"     "Omp"      "F2r"      "Pmepa1"  
## [283] "Ier3"     "Ltk"      "Sh3gl3"   "Folr1"    "Trp53i11" "Calca"   
## [289] "Ush1c"    "Ifit1"    "Tbx2"     "Scube1"   "Dgkb"     "Clu"     
## [295] "Cdkn1a"   "Serpinf1" "Peg3"     "Zfhx3"    "Adamts1"  "Lypd1"
gc()
##             used   (Mb) gc trigger   (Mb)  max used   (Mb)
## Ncells   7393686  394.9   14690761  784.6  13936327  744.3
## Vcells 316899160 2417.8  577580878 4406.6 575970913 4394.4
length(feature.raw)
## [1] 3000
length(features.filt)
## [1] 2788

anchors

all.anchors <- FindIntegrationAnchors(object.list = SCT.list,
                                      dims = 1:50,
                                      anchor.features = features.filt)
## Warning in CheckDuplicateCellNames(object.list = object.list): Some cell names
## are duplicated across objects provided. Renaming to enforce unique cell names.
## Scaling features for provided objects
## Finding all pairwise anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 7143 anchors
## Filtering anchors
##  Retained 5872 anchors
all.anchors
## An AnchorSet object containing 11744 anchors between 2 Seurat objects 
##  This can be used as input to IntegrateData.

Integration

all.anchors <- IntegrateData(anchorset = all.anchors, dims = 1:50)
## Merging dataset 2 into 1
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
all.anchors
## An object of class Seurat 
## 37583 features across 4709 samples within 3 assays 
## Active assay: integrated (2788 features, 2788 variable features)
##  2 other assays present: RNA, SCT
all.anchors@assays$SCT@SCTModel.list
## $model1
## An sctransform model.
##   Model formula:  y ~ log_umi 
##   Parameters stored for 16063 features, 2649 cells.
## 
## $model1.1
## An sctransform model.
##   Model formula:  y ~ log_umi 
##   Parameters stored for 15902 features, 2060 cells.
gc()
##             used   (Mb) gc trigger   (Mb)   max used   (Mb)
## Ncells   7448669  397.9   14690761  784.6   14658615  782.9
## Vcells 487231120 3717.3 1198101146 9140.8 1065493995 8129.1

clustering

all.anchors <- ScaleData(object = all.anchors, verbose = TRUE, 
                         vars.to.regress = c("percent.mt","percent.rb","nCount_RNA"))
## Regressing out percent.mt, percent.rb, nCount_RNA
## Centering and scaling data matrix
gc()
##             used   (Mb) gc trigger   (Mb)   max used   (Mb)
## Ncells   7451367  398.0   14690761  784.6   14690761  784.6
## Vcells 500376573 3817.6 1198101146 9140.8 1065493995 8129.1
length(all.anchors@assays$integrated@var.features)
## [1] 2788
head(all.anchors@meta.data)
##                      orig.ident nCount_RNA nFeature_RNA percent.mt percent.rb
## AAACCCACAGTCGCAC-1_1   P21.rep1      26419         5433   4.398350  10.935312
## AAACCCAGTAGGTACG-1_1   P21.rep1      14427         3814   6.737367  11.908228
## AAACCCATCGCCTCTA-1_1   P21.rep1      24650         5043   4.933063  12.085193
## AAACGAACATAAGCGG-1_1   P21.rep1      26933         5341   8.186983  10.841718
## AAACGAAGTCGTCAGC-1_1   P21.rep1      17679         4748   9.400984   6.736806
## AAACGAATCCTCTAAT-1_1   P21.rep1      33402         5875   5.182324   8.607269
##                           S.Score   G2M.Score Phase preAnno nCount_SCT
## AAACCCACAGTCGCAC-1_1 -0.071428571  0.23805318   G2M    ENC1      22342
## AAACCCAGTAGGTACG-1_1 -0.082539683 -0.06026591    G1    ENC7      20416
## AAACCCATCGCCTCTA-1_1  0.069841270  0.19479582   G2M    ENC2      22207
## AAACGAACATAAGCGG-1_1 -0.161261261 -0.50418803    G1    ENC2      22304
## AAACGAAGTCGTCAGC-1_1  0.100686401  0.02331434     S    ENC2      20482
## AAACGAATCCTCTAAT-1_1 -0.008837409  0.31416904   G2M    ENC2      22264
##                      nFeature_SCT
## AAACCCACAGTCGCAC-1_1         5394
## AAACCCAGTAGGTACG-1_1         3857
## AAACCCATCGCCTCTA-1_1         5018
## AAACGAACATAAGCGG-1_1         5289
## AAACGAAGTCGTCAGC-1_1         4727
## AAACGAATCCTCTAAT-1_1         5616
all.anchors <- RunPCA(all.anchors, do.print = TRUE,
                      features = all.anchors@assays$integrated@var.features,
                      seed.use = 133,
                      npcs = 100,
                      #ndims.print = 1,
                      verbose = T)
## PC_ 1 
## Positive:  Nos1, Rprml, C1ql1, Etv1, Gfra1, Gal, Kitl, Ass1, Stxbp6, Tes 
##     Vip, Fxyd5, Vim, Qdpr, Npy, Cygb, Mfsd4, Dynll1, Ncald, Slc35d3 
##     Cadps2, Cox8b, Arhgap15, Nxn, Enpp1, Kcnab1, Wnt11, Asl, Sh3bgrl3, Spint1 
## Negative:  Slc18a3, Sncb, Chgb, Fam19a5, Syt1, Fam19a1, Rab3b, Hoxb5, Tac1, Myl1 
##     Gfra2, Cpne8, Psd3, Rbfox1, Gpc6, Sphkap, Cd24a, Chd3, Higd1a, Stxbp5l 
##     Id4, Gap43, Pcp4, Ifitm2, Oprk1, Ddah1, Cd81, Casz1, Mt3, Tcf7l2 
## PC_ 2 
## Positive:  Tmeff2, Tmsb10, Serpine2, Zfhx3, Pbx3, Auts2, Avil, Nefl, Mgat4c, Klhl1 
##     Scgn, Ntrk3, Cck, Nefm, Pvrl3, Brinp3, Serpini1, Galnt18, Gnas, Sema5a 
##     March1, Ucn3, Tmem130, Kif26b, Slc17a6, Chst8, Nnat, Amigo2, Id1, Rph3a 
## Negative:  Bub3, Gch1, S100a6, Dmkn, Prnp, Rgmb, Ly6h, S100a16, Oprk1, Csrp1 
##     Necab2, Ptma, Satb1, Mxra7, Tshz2, Syt6, Hoxa5, Itm2b, Tm4sf4, Calb2 
##     Brinp2, Tac1, Nfix, Hmx3, Rgs4, Il11ra1, Syn2, Pmp22, Tcf4, S100a4 
## PC_ 3 
## Positive:  Basp1, Scg2, Tcf7l2, Phox2b, Calb2, Ly6e, Phgdh, Ddah1, Scube1, Meg3 
##     Plxna4, Id2, Adgrg6, P2rx2, Cpne4, Nt5dc2, Nfix, Ifi27, Ano2, Sparc 
##     Krt19, Fez1, Slc4a4, Tshz2, Cryab, Nog, Timp3, Gcgr, S100b, Syt15 
## Negative:  Chchd10, Cox7c, Gda, Epha5, Ndufa4, Ptn, Kcnip4, Ndst4, Cox5a, Htr2b 
##     Stmn1, Ccdc109b, Grem2, Lix1, Cox8a, Atp5g3, Cox4i1, Lin7a, Atp5b, Pgam1 
##     S100a1, Tagln3, Mdh1, Socs2, Gapdh, Atp5o, Nrsn2, Cd200, Tubb2a, Rogdi 
## PC_ 4 
## Positive:  Fgf13, Ank2, Necab1, Rab3c, Adgrg6, Syt15, Gucy1a3, Map1b, Nog, Dapk2 
##     Slc25a48, Lhfpl2, Dpysl3, Cbln2, Ano2, Nmu, Kcnt2, Grin3a, Chl1, Edn1 
##     Htr3b, Cysltr2, Sptbn1, Cnr1, Zeb2, Cd9, Chst15, Layn, Iqgap2, Ndrg2 
## Negative:  Neurod6, Scg5, Bex2, Ebf1, Asb2, Nrip3, Ppp1r14a, Trhde, Efemp1, H2-Q2 
##     Nefl, Resp18, Bex1, Caly, Fam159b, Fam46a, Phox2a, Phlda1, Basp1, Ndn 
##     Spint2, Adcyap1, Galr2, Snca, Tmem59l, Fam131c, Gal, Dbh, Col18a1, Pdlim4 
## PC_ 5 
## Positive:  Nog, Dapk2, Adgrg6, Syt15, Nmu, Edn1, Scn11a, Ano2, Grin3a, Layn 
##     Lhfpl2, Cysltr2, Krt19, Chst15, Slc25a48, Hpca, Npas1, Zfp804a, Gpr149, Cidea 
##     Iqgap2, Cdkn1c, Cbln2, Cpne4, Pcsk1n, Cdh8, Hpcal1, Robo2, Astn2, Lgals1 
## Negative:  Nxph2, Rerg, Fam122b, Pcdh7, Slc17a6, Pbx3, Nefm, Npy5r, Akap13, Klhl1 
##     Csrp2, Nefl, Pcp4l1, Calb1, Lrrfip1, Abca9, Ahi1, Npy1r, Ntng1, Fgfr1 
##     Cckar, Khdrbs3, Akap7, Gna14, Dpp6, Dner, Fjx1, Ddah1, Ush1c, Xpr1
DimHeatmap(all.anchors, dims = 1:12, cells = 1500, balanced = TRUE,ncol = 4)

ElbowPlot(all.anchors, ndims = 100)

ElbowPlot(all.anchors, ndims = 50)

Integrated

all.anchors
## An object of class Seurat 
## 37583 features across 4709 samples within 3 assays 
## Active assay: integrated (2788 features, 2788 variable features)
##  2 other assays present: RNA, SCT
##  1 dimensional reduction calculated: pca
#DefaultAssay(all.anchors) <- "integrated"
PCsct <- 1:35
all.anchors <- FindNeighbors(all.anchors, k.param = 20, dims = PCsct, compute.SNN = T, reduction = 'pca', verbose = T)
## Computing nearest neighbor graph
## Computing SNN
all.anchors <- FindClusters(all.anchors, dims.use = PCsct, algorithm = 1, save.SNN =T, resolution =1.5, reduction = 'pca', verbose = T)
## Warning: The following arguments are not used: dims.use, save.SNN, reduction
## Suggested parameter: dims instead of dims.use
## Warning: The following arguments are not used: dims.use, save.SNN, reduction
## Suggested parameter: dims instead of dims.use
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 4709
## Number of edges: 177059
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7860
## Number of communities: 21
## Elapsed time: 0 seconds
all.anchors <- RunTSNE(object = all.anchors, assay = "integrated", seed.use = 127, dims = PCsct, complexity=50)
all.anchors <- RunUMAP(object = all.anchors, assay = "integrated", seed.use = 112, dims = PCsct, n.neighbors = 20, min.dist = 0.25)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 15:02:19 UMAP embedding parameters a = 1.121 b = 1.057
## 15:02:19 Read 4709 rows and found 35 numeric columns
## 15:02:19 Using Annoy for neighbor search, n_neighbors = 20
## 15:02:19 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 15:02:19 Writing NN index file to temp file C:\Users\Shaorui\AppData\Local\Temp\RtmpyaOKqJ\filef782ca41941
## 15:02:19 Searching Annoy index using 1 thread, search_k = 2000
## 15:02:20 Annoy recall = 100%
## 15:02:20 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 20
## 15:02:21 Initializing from normalized Laplacian + noise (using irlba)
## 15:02:21 Commencing optimization for 500 epochs, with 128572 positive edges
## 15:02:33 Optimization finished
DimPlot(all.anchors, label = T, pt.size = 0.05, repel = F, reduction = 'tsne', group.by = "seurat_clusters") +
DimPlot(all.anchors, label = T, pt.size = 0.05, repel = F, reduction = 'umap', group.by = "seurat_clusters")

all.anchors$preAnno <- factor(as.character(all.anchors$preAnno),
                              levels = paste0("ENC",1:12))
color.pre <- c("#8AB6CE","#678BB1","#3975C1","#4CC1BD",
               "#C03778","#97BA59","#DFAB16","#BF7E6B",
               "#D46B35","#BDAE8D","#BD66C4","#2BA956")
DimPlot(all.anchors, label = T, pt.size = 1, repel = F, reduction = 'tsne', group.by = "preAnno", cols = color.pre) +
DimPlot(all.anchors, label = T, pt.size = 1, repel = T, reduction = 'umap', group.by = "preAnno", label.size = 3.6, cols = color.pre)

further remove low-quality/mix-like C11/14/20

all.anchors <- subset(all.anchors, subset = seurat_clusters %in% setdiff(levels(all.anchors$seurat_clusters),
                                                                         c(11,14,20)))
all.anchors
## An object of class Seurat 
## 37583 features across 4419 samples within 3 assays 
## Active assay: integrated (2788 features, 2788 variable features)
##  2 other assays present: RNA, SCT
##  3 dimensional reductions calculated: pca, tsne, umap
DimPlot(all.anchors, label = T, pt.size = 0.05, repel = F, reduction = 'tsne', group.by = "seurat_clusters") +
DimPlot(all.anchors, label = T, pt.size = 0.05, repel = F, reduction = 'umap', group.by = "seurat_clusters")

DimPlot(all.anchors, label = T, pt.size = 1, repel = F, reduction = 'tsne', group.by = "preAnno", cols = color.pre) +
DimPlot(all.anchors, label = T, pt.size = 1, repel = T, reduction = 'umap', group.by = "preAnno", label.size = 3.6, cols = color.pre)

DimPlot(all.anchors, label = F, pt.size = 0.4, repel = F, reduction = 'umap', group.by = "orig.ident", cols = ggsci::pal_startrek()(2)) +
DimPlot(all.anchors, label = T, pt.size = 1, repel = T, reduction = 'umap', group.by = "preAnno", label.size = 3.6, cols = color.pre)

annotation

all.anchors$sort_clusters <- factor(as.character(all.anchors$seurat_clusters),
                                    levels = c(1,4, # ENC1
                                               0,9,5,3,   #  ENC2,3
                                               17,    # ENC4
                                               18,    # ENC5
                                               15,    # ENC6
                                               8,     # ENC7
                                               6,2,7,12,    # ENC8,9
                                               10,    # ENC10
                                               16,     # ENC11
                                               19,13   # ENC12
                                               ))
all.anchors$Anno1 <- as.character(all.anchors$seurat_clusters)

all.anchors$Anno1[all.anchors$Anno1 %in% c(1,4)] <- "ENC1"
all.anchors$Anno1[all.anchors$Anno1 %in% c(0,9)] <- "ENC2"
all.anchors$Anno1[all.anchors$Anno1 %in% c(5,3)] <- "ENC3"
all.anchors$Anno1[all.anchors$Anno1 %in% c(17)] <- "ENC4"
all.anchors$Anno1[all.anchors$Anno1 %in% c(18)] <- "ENC5"
all.anchors$Anno1[all.anchors$Anno1 %in% c(15)] <- "ENC6"
all.anchors$Anno1[all.anchors$Anno1 %in% c(8)] <- "ENC7"
all.anchors$Anno1[all.anchors$Anno1 %in% c(6,2)] <- "ENC8"
all.anchors$Anno1[all.anchors$Anno1 %in% c(7,12)] <- "ENC9"
all.anchors$Anno1[all.anchors$Anno1 %in% c(10)] <- "ENC10"
all.anchors$Anno1[all.anchors$Anno1 %in% c(16)] <- "ENC11"
all.anchors$Anno1[all.anchors$Anno1 %in% c(19,13)] <- "ENC12"

all.anchors$Anno1 <- factor(all.anchors$Anno1,
                            levels = paste0("ENC",1:12))
all.anchors$Anno2 <- as.character(all.anchors$seurat_clusters)

all.anchors$Anno2[all.anchors$Anno2 %in% c(1,4)] <- "EMN1"
all.anchors$Anno2[all.anchors$Anno2 %in% c(0,9)] <- "EMN2"
all.anchors$Anno2[all.anchors$Anno2 %in% c(5)] <- "EMN3"
all.anchors$Anno2[all.anchors$Anno2 %in% c(3)] <- "EMN4"
all.anchors$Anno2[all.anchors$Anno2 %in% c(17)] <- "EMN5"

all.anchors$Anno2[all.anchors$Anno2 %in% c(6,2)] <- "IMN1"
all.anchors$Anno2[all.anchors$Anno2 %in% c(7,12)] <- "IMN2"

all.anchors$Anno2[all.anchors$Anno2 %in% c(10)] <- "IN1"
all.anchors$Anno2[all.anchors$Anno2 %in% c(16)] <- "IN2"
all.anchors$Anno2[all.anchors$Anno2 %in% c(18)] <- "IN3"

all.anchors$Anno2[all.anchors$Anno2 %in% c(15)] <- "IPAN1"
all.anchors$Anno2[all.anchors$Anno2 %in% c(8)] <- "IPAN2"
all.anchors$Anno2[all.anchors$Anno2 %in% c(19)] <- "IPAN3"
all.anchors$Anno2[all.anchors$Anno2 %in% c(13)] <- "IPAN4"

all.anchors$Anno2 <- factor(all.anchors$Anno2,
                            levels = c(paste0("EMN",1:5),
                                       paste0("IMN",1:2),
                                       paste0("IN",1:3),
                                       paste0("IPAN",1:4)))
DimPlot(all.anchors, label = F, pt.size = 0.4, repel = F, reduction = 'umap', group.by = "orig.ident", cols = ggsci::pal_startrek()(2)) +
DimPlot(all.anchors, label = T, pt.size = 1, repel = T, reduction = 'umap', group.by = "Anno1", label.size = 3.6, cols = color.pre)

DimPlot(all.anchors, label = T, pt.size = 0.4, repel = F, reduction = 'umap', group.by = "seurat_clusters") +
DimPlot(all.anchors, label = T, pt.size = 1, repel = T, reduction = 'umap', group.by = "Anno1", label.size = 3.6, cols = color.pre)

DimPlot(all.anchors, label = T, pt.size = 0.4, repel = F, reduction = 'umap', group.by = "seurat_clusters") + NoLegend() +
DimPlot(all.anchors, label = T, pt.size = 1, repel = T, reduction = 'umap', group.by = "Anno2", label.size = 3.6)

markers

figure2

check.fig2 <- list(Neurotransmission=c("Oprk1","Adrb2","Cckar","Htr2b",
                                       "Gucy2g","Galr1","Vipr2","Grin3a",
                                       "Adora1","Crhr2","Chrna2","Tacr3",
                                       "Gabrg3","Nmur2","Grm5","P2ry6",
                                       "Galr2","Sstr2","Gabre","Npy5r",
                                       "Npy1r","Sstr5"#,"Drd2","Drd3"
                                       ),
                   Othersignaling=c("Cxcl12","Fgfr2","Ntrk2","Egfr",
                                    "Nmbr","Ptgfr","Pgf","Edn1",
                                    "Kit","Prokr2","Islr2","Gfral",
                                    "Mc4r","Bdnf","Kitl","Gfra1",
                                    "Tgfbr2","Ednra","Prokr1","Bmpr1b"),
                   Ionchannels=c("Kcns3","Ano10","Kcnip4","Kcnip1",
                                 "Kcnn2","Kcnn3","Ano2","Ano8",
                                 "Kcna2","Scn1a","Kcna5","Kcnab1",
                                 "Cacna1i","Kcnd2","Kcnv1",
                                 "Cacng5","Piezo2","Piezo1"),   # Peizo1 manually added 
                   Adhesionmolecules=c("Ly6c1","Itgb5","Sema3e","Ntn1",
                                       "Slitrk4","Itga6","Cdh8","Ptpru",
                                       "Itgb6","Unc5b","Avil","Sema5a",
                                       "Epha8","Cdh7","Itga1","Ephb6",
                                       "Flrt2","Nxph2","Ntng1"),
                   Transcriptionfactors=c("Satb1","Ebf3","Bnc2","Nfatc1",
                                          "Zfp503","Satb2","Cux2","Dlx3",
                                          "Atoh8","Zfp804a","Ebf2","Pbx3",
                                          "Meis1","Etv1","St18","Ebf1",
                                          "Neurod6","Trps1","Zfp800","Onecut2",
                                          "Phox2a","Phox2b"),
                   AnnexinsCopines=c("Anxa4","Anxa3","Anxa11","Anxa2",
                                     "Anxa5","Anxa6","Anxa7","Cpne4",
                                     "Cpne5","Cpne7","Cpne2","Cpne3",
                                     "Cpne8"))
names.fig2 <- names(check.fig2)
check.fig2
## $Neurotransmission
##  [1] "Oprk1"  "Adrb2"  "Cckar"  "Htr2b"  "Gucy2g" "Galr1"  "Vipr2"  "Grin3a"
##  [9] "Adora1" "Crhr2"  "Chrna2" "Tacr3"  "Gabrg3" "Nmur2"  "Grm5"   "P2ry6" 
## [17] "Galr2"  "Sstr2"  "Gabre"  "Npy5r"  "Npy1r"  "Sstr5" 
## 
## $Othersignaling
##  [1] "Cxcl12" "Fgfr2"  "Ntrk2"  "Egfr"   "Nmbr"   "Ptgfr"  "Pgf"    "Edn1"  
##  [9] "Kit"    "Prokr2" "Islr2"  "Gfral"  "Mc4r"   "Bdnf"   "Kitl"   "Gfra1" 
## [17] "Tgfbr2" "Ednra"  "Prokr1" "Bmpr1b"
## 
## $Ionchannels
##  [1] "Kcns3"   "Ano10"   "Kcnip4"  "Kcnip1"  "Kcnn2"   "Kcnn3"   "Ano2"   
##  [8] "Ano8"    "Kcna2"   "Scn1a"   "Kcna5"   "Kcnab1"  "Cacna1i" "Kcnd2"  
## [15] "Kcnv1"   "Cacng5"  "Piezo2"  "Piezo1" 
## 
## $Adhesionmolecules
##  [1] "Ly6c1"   "Itgb5"   "Sema3e"  "Ntn1"    "Slitrk4" "Itga6"   "Cdh8"   
##  [8] "Ptpru"   "Itgb6"   "Unc5b"   "Avil"    "Sema5a"  "Epha8"   "Cdh7"   
## [15] "Itga1"   "Ephb6"   "Flrt2"   "Nxph2"   "Ntng1"  
## 
## $Transcriptionfactors
##  [1] "Satb1"   "Ebf3"    "Bnc2"    "Nfatc1"  "Zfp503"  "Satb2"   "Cux2"   
##  [8] "Dlx3"    "Atoh8"   "Zfp804a" "Ebf2"    "Pbx3"    "Meis1"   "Etv1"   
## [15] "St18"    "Ebf1"    "Neurod6" "Trps1"   "Zfp800"  "Onecut2" "Phox2a" 
## [22] "Phox2b" 
## 
## $AnnexinsCopines
##  [1] "Anxa4"  "Anxa3"  "Anxa11" "Anxa2"  "Anxa5"  "Anxa6"  "Anxa7"  "Cpne4" 
##  [9] "Cpne5"  "Cpne7"  "Cpne2"  "Cpne3"  "Cpne8"
lapply(check.fig2, length)
## $Neurotransmission
## [1] 22
## 
## $Othersignaling
## [1] 20
## 
## $Ionchannels
## [1] 18
## 
## $Adhesionmolecules
## [1] 19
## 
## $Transcriptionfactors
## [1] 22
## 
## $AnnexinsCopines
## [1] 13
names.fig2
## [1] "Neurotransmission"    "Othersignaling"       "Ionchannels"         
## [4] "Adhesionmolecules"    "Transcriptionfactors" "AnnexinsCopines"
DefaultAssay(all.anchors) <- "SCT"
cowplot::plot_grid(
DotPlot(all.anchors, features = check.fig2$Neurotransmission, group.by = "sort_clusters",
        cols = c("lightgrey","blue")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 11.5))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[1]),

DotPlot(all.anchors, features = check.fig2$Othersignaling, group.by = "sort_clusters",
        cols = c("lightgrey","blue")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+#scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[2]),

DotPlot(all.anchors, features = check.fig2$Ionchannels, group.by = "sort_clusters",
        cols = c("lightgrey","blue")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[3]),

DotPlot(all.anchors, features = check.fig2$Adhesionmolecules, group.by = "sort_clusters",
        cols = c("lightgrey","blue")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[4]),

DotPlot(all.anchors, features = check.fig2$Transcriptionfactors, group.by = "sort_clusters",
        cols = c("lightgrey","blue")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[5]),

DotPlot(all.anchors, features = check.fig2$AnnexinsCopines, group.by = "sort_clusters",
        cols = c("lightgrey","blue")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[6]),
ncol = 2)

cowplot::plot_grid(
DotPlot(all.anchors, features = check.fig2$Neurotransmission, group.by = "preAnno",
        cols = c("lightgrey","blue")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 11.5))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[1]),

DotPlot(all.anchors, features = check.fig2$Othersignaling, group.by = "preAnno",
        cols = c("lightgrey","blue")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+#scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[2]),

DotPlot(all.anchors, features = check.fig2$Ionchannels, group.by = "preAnno",
        cols = c("lightgrey","blue")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[3]),

DotPlot(all.anchors, features = check.fig2$Adhesionmolecules, group.by = "preAnno",
        cols = c("lightgrey","blue")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[4]),

DotPlot(all.anchors, features = check.fig2$Transcriptionfactors, group.by = "preAnno",
        cols = c("lightgrey","blue")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[5]),

DotPlot(all.anchors, features = check.fig2$AnnexinsCopines, group.by = "preAnno",
        cols = c("lightgrey","blue")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[6]),
ncol = 2)

cowplot::plot_grid(
DotPlot(all.anchors, features = check.fig2$Neurotransmission, group.by = "Anno1",
        cols = c("lightgrey","blue")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 11.5))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[1]),

DotPlot(all.anchors, features = check.fig2$Othersignaling, group.by = "Anno1",
        cols = c("lightgrey","blue")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+#scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[2]),

DotPlot(all.anchors, features = check.fig2$Ionchannels, group.by = "Anno1",
        cols = c("lightgrey","blue")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[3]),

DotPlot(all.anchors, features = check.fig2$Adhesionmolecules, group.by = "Anno1",
        cols = c("lightgrey","blue")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[4]),

DotPlot(all.anchors, features = check.fig2$Transcriptionfactors, group.by = "Anno1",
        cols = c("lightgrey","blue")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[5]),

DotPlot(all.anchors, features = check.fig2$AnnexinsCopines, group.by = "Anno1",
        cols = c("lightgrey","blue")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[6]),
ncol = 2)

cowplot::plot_grid(
DotPlot(all.anchors, features = check.fig2$Neurotransmission, group.by = "Anno2",
        cols = c("lightgrey","blue")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 11.5))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[1]),

DotPlot(all.anchors, features = check.fig2$Othersignaling, group.by = "Anno2",
        cols = c("lightgrey","blue")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+#scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[2]),

DotPlot(all.anchors, features = check.fig2$Ionchannels, group.by = "Anno2",
        cols = c("lightgrey","blue")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[3]),

DotPlot(all.anchors, features = check.fig2$Adhesionmolecules, group.by = "Anno2",
        cols = c("lightgrey","blue")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[4]),

DotPlot(all.anchors, features = check.fig2$Transcriptionfactors, group.by = "Anno2",
        cols = c("lightgrey","blue")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[5]),

DotPlot(all.anchors, features = check.fig2$AnnexinsCopines, group.by = "Anno2",
        cols = c("lightgrey","blue")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[6]),
ncol = 2)

#saveRDS(all.anchors, "./GSE149524.P21.integration_Anno.s.rds")
#all.anchors <- readRDS("./GSE149524.P21.integration_Anno.S.rds")
cowplot::plot_grid(
DotPlot(all.anchors, features = c(check.fig2$Neurotransmission,"Gal"), group.by = "Anno2",
        cols = c("lightgrey","blue")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 11.5))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[1]),

DotPlot(all.anchors, features = check.fig2$Othersignaling, group.by = "Anno2",
        cols = c("lightgrey","blue")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+#scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[2]),

DotPlot(all.anchors, features = check.fig2$Ionchannels, group.by = "Anno2",
        cols = c("lightgrey","blue")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[3]),

DotPlot(all.anchors, features = check.fig2$Adhesionmolecules, group.by = "Anno2",
        cols = c("lightgrey","blue")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[4]),

DotPlot(all.anchors, features = check.fig2$Transcriptionfactors, group.by = "Anno2",
        cols = c("lightgrey","blue")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[5]),

DotPlot(all.anchors, features = check.fig2$AnnexinsCopines, group.by = "Anno2",
        cols = c("lightgrey","blue")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[6]),
ncol = 2)